library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.7 ✔ dplyr 1.0.9
✔ tidyr 1.2.0 ✔ stringr 1.4.0
✔ readr 2.1.2 ✔ forcats 0.5.1
── Conflicts ─────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(scales)
Attaching package: ‘scales’
The following object is masked from ‘package:purrr’:
discard
The following object is masked from ‘package:readr’:
col_factor
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(lubridate)
Attaching package: ‘lubridate’
The following objects are masked from ‘package:base’:
date, intersect, setdiff, union
library(sf)
Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(zoo)
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
#load in beds and add year column
beds <- read_csv("raw_data/non_covid_raw_data/beds_by_nhs_board_of_treatment_and_specialty.csv") %>% janitor::clean_names()
Warning: One or more parsing issues, see `problems()` for detailsRows: 30458 Columns: 20── Column specification ─────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, Specialty, SpecialtyQF, Special...
dbl (5): AllStaffedBeddays, TotalOccupiedBeddays, AverageAvailableStaffedBeds, AverageOccupi...
lgl (5): AllStaffedBeddaysQF, TotalOccupiedBeddaysQF, AverageAvailableStaffedBedsQF, Average...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
beds <- beds %>%
mutate(quarter_date = yq(quarter),
year = year(quarter_date),
.after = quarter)
beds %>%
write_csv("clean_data/non_covid_data/beds.csv")
# bed percentage availablity for "all acute"
# Will need to add filter for year based on user input
beds_plotly <- beds %>%
filter(specialty_name == "All Acute") %>%
group_by(quarter, specialty_name) %>%
summarise(mean_perc_occ = mean(percentage_occupancy)) %>%
ggplot(aes(x = quarter, y = mean_perc_occ))+
geom_line(aes(colour = specialty_name, group = specialty_name))+
geom_point(aes(
text = paste0("Occupancy:",round(mean_perc_occ, digits = 2)," %\n", quarter)),
size = 0.5)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(title = "Mean bed availability for all Acute Patients",
x = "\nYear and Quarter",
y = "Average Percentage Occupancy")
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.Warning: Ignoring unknown aesthetics: text
ggplotly(beds_plotly, tooltip = "text") %>%
config(displayModeBar = FALSE) %>%
layout(hoverlabel=list(bgcolor="white"))
NA
NA
ae_wait_times <- read_csv("raw_data/non_covid_raw_data/monthly_ae_waitingtimes_202206.csv") %>% janitor::clean_names()
Rows: 15837 Columns: 25── Column specification ─────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Country, HBT, TreatmentLocation, DepartmentType, NumberOfAttendancesEpisodeQF, Numb...
dbl (12): Month, NumberOfAttendancesAggregate, NumberOfAttendancesEpisode, NumberMeetingTarge...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#make a date and year column with the first date of every month
ae_wait_times <- ae_wait_times %>%
mutate(date = ym(month), .after = month,
year = year(date))
#make a percent column with percent of patients meeting the 4hr target time
ae_wait_times <- ae_wait_times %>%
mutate(percent_4hr_target_achieved = (number_meeting_target_aggregate/number_of_attendances_aggregate)*100)
ae_wait_times %>%
write_csv("clean_data/non_covid_data/ae_wait_times.csv")
#write in the target data for the shapefile colours
target_2007 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2007) %>%
rename(ae_target_2007 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2007)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2008 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2008) %>%
rename(ae_target_2008 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2008)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2009 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2009) %>%
rename(ae_target_2009 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2009)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2010 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2010) %>%
rename(ae_target_2010 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2010)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2011 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2011) %>%
rename(ae_target_2011 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2011)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2012 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2012) %>%
rename(ae_target_2012 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2012)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2013 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2013) %>%
rename(ae_target_2013 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2013)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2014 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2014) %>%
rename(ae_target_2014 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2014)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2015 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2015) %>%
rename(ae_target_2015 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2015)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2016 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2016) %>%
rename(ae_target_2016 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2016)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2017 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2017) %>%
rename(ae_target_2017 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2017)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2018 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2018) %>%
rename(ae_target_2018 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2018)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2019 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2019) %>%
rename(ae_target_2019 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2019)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2020 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2020) %>%
rename(ae_target_2020 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2020)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
target_2021 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2021) %>%
rename(ae_target_2021 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2021)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
#scotland_smaller$Shape_Area <- as.character(scotland_smaller$Shape_Area)
st_write(scotland_smaller, "clean_data/shapefile/scotland_smaller.gpkg", append = FALSE)
Warning: Dropping column(s) centres of class(es) sfc_POINT;sfc
Deleting layer `scotland_smaller' using driver `GPKG'
Writing layer `scotland_smaller' to data source
`clean_data/shapefile/scotland_smaller.gpkg' using driver `GPKG'
Writing 14 features with 21 fields and geometry type Multi Polygon.
simd <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv") %>% janitor::clean_names()
Rows: 40821 Columns: 18── Column specification ─────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType, AdmissionTypeQF,...
dbl (7): SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, LengthOfStay, Avera...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
simd <- simd %>%
mutate(quarter_date = yq(quarter),
year = year(quarter_date),
.after = quarter)
simd %>%
write_csv("clean_data/non_covid_data/simd.csv")
# average episodes by SIMD value
# currently unfiltered for health board or admission type etc
simd_plotly <- simd %>%
drop_na(simd) %>%
mutate(simd = as.factor(simd)) %>% # gives each simd a separate colour
group_by(quarter, simd) %>%
summarise(avg_episodes = mean(episodes, na.rm = TRUE)) %>%
ggplot(aes(x = quarter, y = avg_episodes, group = simd))+
geom_line(aes(colour = simd))+
geom_point(size = 0.5)+
scale_y_continuous(labels = scales::comma)+
labs(title = "Average Hospital Episodes by SIMD Deprevation score\n",
x = "\nYear and Quarter",
y = "Average Episodes\n")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.
ggplotly(simd_plotly) %>%
config(displayModeBar = FALSE)
age_sex <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_age_and_sex.csv") %>% janitor::clean_names()
Rows: 129393 Columns: 18── Column specification ─────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType, AdmissionTypeQF,...
dbl (6): Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, LengthOfStay, AverageLeng...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
age_sex <- age_sex %>%
mutate(quarter_date = yq(quarter),
year = year(quarter_date),
.after = quarter)
age_sex %>%
write_csv("clean_data/non_covid_data/age_sex.csv")
# Average number of episode for age groups
# currently unfiltered by department or anything else
age_plotly <- age_sex %>%
#filter(min_date < year & year < max_date) %>%
group_by(quarter, age) %>%
summarise(avg_episodes = mean(episodes, na.rm = TRUE)) %>%
ggplot(aes(x = quarter, y = avg_episodes))+
geom_line(aes(colour = age, group = age))+
geom_point(size = 0.5)+
labs(title = "Average Hospital Episodes by Age Groups\n",
x = "\nYear and Quarter",
y = "Average Episodes\n")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.
ggplotly(age_plotly) %>%
config(displayModeBar = FALSE)
sex_plotly <- age_sex %>%
group_by(quarter, sex) %>%
summarise(avg_length_of_episode = mean(average_length_of_episode, na.rm = TRUE)) %>%
ggplot(aes(x = quarter, y = avg_length_of_episode))+
geom_line(aes(colour = sex, group = sex))+
#geom_smooth(aes(colour = sex, group = sex), se = FALSE)+
geom_point(size = 0.5)+
labs(title = "Average Hospital Episodes by Gender\n",
x = "\nYear and Quarter",
y = "Average Episodes\n")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.
ggplotly(sex_plotly, hovertemplate = ) %>%
config(displayModeBar = FALSE)
ae_wait_times <- read_csv("raw_data/non_covid_raw_data/monthly_ae_waitingtimes_202206.csv") %>% janitor::clean_names()
Rows: 15837 Columns: 25── Column specification ───────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Country, HBT, TreatmentLocation, DepartmentType, NumberOfAttendancesEpisodeQF, Nu...
dbl (12): Month, NumberOfAttendancesAggregate, NumberOfAttendancesEpisode, NumberMeetingTar...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#make a date and year column with the first date of every month
ae_wait_times <- ae_wait_times %>%
mutate(date = ym(month), .after = month,
year = year(date))
#make a percent column with percent of patients meeting the 4hr target time
ae_wait_times <- ae_wait_times %>%
mutate(percent_4hr_target_achieved = (number_meeting_target_aggregate/number_of_attendances_aggregate)*100)
ae_wait_times %>%
write_csv("clean_data/non_covid_data/ae_wait_times.csv")